# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import re
import plotly.express as px
import matplotlib.pyplot as plt
import os
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
marvel=pd.read_csv("Marvel_Movies.csv")
marvel
| name | ID | ALIGN | EYE | HAIR | SEX | GSM | ALIVE | APPEARANCES | FIRST APPEARANCE | Year | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Spider-Man (Peter Parker) | Secret Identity | Good Characters | Hazel Eyes | Brown Hair | Male Characters | NaN | Living Characters | 4043.0 | Aug-62 | 1962.0 |
| 1 | Captain America (Steven Rogers) | Public Identity | Good Characters | Blue Eyes | White Hair | Male Characters | NaN | Living Characters | 3360.0 | Mar-41 | 1941.0 |
| 2 | Wolverine (James \"Logan\" Howlett) | Public Identity | Neutral Characters | Blue Eyes | Black Hair | Male Characters | NaN | Living Characters | 3061.0 | Oct-74 | 1974.0 |
| 3 | Iron Man (Anthony \"Tony\" Stark) | Public Identity | Good Characters | Blue Eyes | Black Hair | Male Characters | NaN | Living Characters | 2961.0 | Mar-63 | 1963.0 |
| 4 | Thor (Thor Odinson) | No Dual Identity | Good Characters | Blue Eyes | Blond Hair | Male Characters | NaN | Living Characters | 2258.0 | Nov-50 | 1950.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 16371 | Ru'ach (Earth-616) | No Dual Identity | Bad Characters | Green Eyes | No Hair | Male Characters | NaN | Living Characters | NaN | NaN | NaN |
| 16372 | Thane (Thanos' son) (Earth-616) | No Dual Identity | Good Characters | Blue Eyes | Bald | Male Characters | NaN | Living Characters | NaN | NaN | NaN |
| 16373 | Tinkerer (Skrull) (Earth-616) | Secret Identity | Bad Characters | Black Eyes | Bald | Male Characters | NaN | Living Characters | NaN | NaN | NaN |
| 16374 | TK421 (Spiderling) (Earth-616) | Secret Identity | Neutral Characters | NaN | NaN | Male Characters | NaN | Living Characters | NaN | NaN | NaN |
| 16375 | Yologarch (Earth-616) | NaN | Bad Characters | NaN | NaN | NaN | NaN | Living Characters | NaN | NaN | NaN |
16376 rows × 11 columns
marvel.shape
(16376, 11)
marvel.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 16376 entries, 0 to 16375 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 name 16376 non-null object 1 ID 12606 non-null object 2 ALIGN 13564 non-null object 3 EYE 6609 non-null object 4 HAIR 12112 non-null object 5 SEX 15522 non-null object 6 GSM 90 non-null object 7 ALIVE 16373 non-null object 8 APPEARANCES 15280 non-null float64 9 FIRST APPEARANCE 15561 non-null object 10 Year 15561 non-null float64 dtypes: float64(2), object(9) memory usage: 1.4+ MB
nicknames=[]
names=[]
for name in marvel.name:
match= re.search("\((?:(?!\))(?:.|\n))*\)",name)
if match:
nickname=name[match.start()+1:match.end()-1]
newname=name.replace(name[match.start():match.end()],"")
names.append(newname)
nicknames.append(nickname)
else:
names.append(np.nan)
nicknames.append(np.nan)
marvel['nicknames']=names
marvel['name']=nicknames
marvel
| name | ID | ALIGN | EYE | HAIR | SEX | GSM | ALIVE | APPEARANCES | FIRST APPEARANCE | Year | nicknames | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Peter Parker | Secret Identity | Good Characters | Hazel Eyes | Brown Hair | Male Characters | NaN | Living Characters | 4043.0 | Aug-62 | 1962.0 | Spider-Man |
| 1 | Steven Rogers | Public Identity | Good Characters | Blue Eyes | White Hair | Male Characters | NaN | Living Characters | 3360.0 | Mar-41 | 1941.0 | Captain America |
| 2 | James \"Logan\" Howlett | Public Identity | Neutral Characters | Blue Eyes | Black Hair | Male Characters | NaN | Living Characters | 3061.0 | Oct-74 | 1974.0 | Wolverine |
| 3 | Anthony \"Tony\" Stark | Public Identity | Good Characters | Blue Eyes | Black Hair | Male Characters | NaN | Living Characters | 2961.0 | Mar-63 | 1963.0 | Iron Man |
| 4 | Thor Odinson | No Dual Identity | Good Characters | Blue Eyes | Blond Hair | Male Characters | NaN | Living Characters | 2258.0 | Nov-50 | 1950.0 | Thor |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 16371 | Earth-616 | No Dual Identity | Bad Characters | Green Eyes | No Hair | Male Characters | NaN | Living Characters | NaN | NaN | NaN | Ru'ach |
| 16372 | Thanos' son | No Dual Identity | Good Characters | Blue Eyes | Bald | Male Characters | NaN | Living Characters | NaN | NaN | NaN | Thane (Earth-616) |
| 16373 | Skrull | Secret Identity | Bad Characters | Black Eyes | Bald | Male Characters | NaN | Living Characters | NaN | NaN | NaN | Tinkerer (Earth-616) |
| 16374 | Spiderling | Secret Identity | Neutral Characters | NaN | NaN | Male Characters | NaN | Living Characters | NaN | NaN | NaN | TK421 (Earth-616) |
| 16375 | Earth-616 | NaN | Bad Characters | NaN | NaN | NaN | NaN | Living Characters | NaN | NaN | NaN | Yologarch |
16376 rows × 12 columns
marvel['SEX'] = marvel['SEX'].fillna("Не определён")
gendre=marvel.SEX.value_counts()
px.bar(gendre,text_auto='.4s', labels={'index':'Genders','variable':'Quantity'})
marvel.SEX.unique()
array(['Male Characters', 'Female Characters', 'Genderfluid Characters',
'Agender Characters', 'Не определён'], dtype=object)
len(marvel[marvel['SEX']=="Male Characters"])/len(marvel) * 100
genderValues = marvel.SEX.unique()
total = marvel['SEX'].value_counts().sum()
print("Процентаж персонажей по полу")
gendrePercentage = pd.Series(dtype='float64')
for v in genderValues:
p = len(marvel[marvel['SEX']==v])/len(marvel) * 100
gendrePercentage = pd.concat([gendrePercentage, pd.Series(data={v: p}, index=[v])])
print(v + ": " + str(round(p,2)) + " %")
px.bar(gendrePercentage.sort_values(ascending=False),text_auto='.4s', labels={'index':'Genders','variable':'Percentage'})
Процентаж персонажей по полу Male Characters: 71.07 % Female Characters: 23.43 % Genderfluid Characters: 0.01 % Agender Characters: 0.27 % Не определён: 5.21 %
marvel.ALIVE.unique()
array(['Living Characters', 'Deceased Characters', nan], dtype=object)
print("ALIVE - " + str(len(marvel[marvel['ALIVE']=="Living Characters"])/len(marvel)))
print("Deceased Characters - " + str(len(marvel[marvel['ALIVE']=="Deceased Characters"])/len(marvel)))
print("Unknown status - " + str(len(marvel[marvel['ALIVE'].isnull()])/len(marvel)))
ALIVE - 0.7699071812408402 Deceased Characters - 0.22990962383976551 Unknown status - 0.00018319491939423546
years=marvel.Year.value_counts().sort_index()
px.line(years)
fig = px.bar(data_frame=marvel.head(10), x='nicknames', y='APPEARANCES', title='ТОП - 10 персонажей по популярности',
color='APPEARANCES',
color_continuous_scale=["orange", "red",
"green", "blue",
"purple"])
fig.show()
eye=marvel.EYE.value_counts().sort_values().head()
px.bar(eye,orientation='h')
hair=marvel.HAIR.value_counts().sort_values(ascending=False)
px.bar(hair)
marvel.groupby(['EYE','HAIR']).count().name.nlargest(10)
EYE HAIR
Brown Eyes Black Hair 824
Brown Hair 655
Blue Eyes Blond Hair 648
Black Hair 391
Brown Hair 294
Black Eyes Black Hair 257
Red Eyes No Hair 186
Green Eyes No Hair 153
Brown Eyes Bald 137
Blue Eyes White Hair 132
Name: name, dtype: int64
dfg=marvel.groupby(['EYE','HAIR']).count().name.nlargest(10)
ax = dfg.unstack(level=0).plot(kind='bar', subplots=True, rot=40, figsize=(20, 20), layout=(6, 5))
plt.show()